# delimit ;
clear ;
cd "replication" ;

* *************************************************************************** ;
* load data 
* *************************************************************************** ;

use "./data/baseline.dta", clear  ;

* *************************************************************************** ;
* count number of study participants
* *************************************************************************** ;

* by wave ;
forvalues i = 1/4 { ;
	count if wave == `i' ;
	local Nwave`i' = `r(N)' ;
} ; 

* all waves ;
qui des ;
local NwaveT = r(N) ;

* *************************************************************************** ;
* percentage allocation to each study arm 
* *************************************************************************** ;

* by wave ;
forvalues i = 1/4 { ;
	count if wave == `i' & healthonly == 1 ;
	local pct_wave`i'_hee = string(100*round(`r(N)'/`Nwave`i'', 0.01)) + "\%" ;
	
	count if wave == `i' & healthandpay == 1 ;
	local pct_wave`i'_heec = string(100*round(`r(N)'/`Nwave`i'', 0.01)) + "\%" ;

	count if wave == `i' & control == 1 ;
	local pct_wave`i'_control = string(100*round(`r(N)'/`Nwave`i'', 0.01)) + "\%" ;
} ; 

* all waves ;
count if healthonly == 1 ;
local pct_waveT_hee = string(100*round(`r(N)'/`NwaveT', 0.01)) + "\%" ; 

count if healthandpay == 1 ;
local pct_waveT_heec = string(100*round(`r(N)'/`NwaveT', 0.01)) + "\%" ; 

count if control == 1 ;
local pct_waveT_control = string(100*round(`r(N)'/`NwaveT', 0.01)) + "\%" ; 

* *************************************************************************** ;
* midline data, percent missing 
* *************************************************************************** ;

use "./data/midline.dta", clear ;

* by wave ;
forvalues i = 1/4 { ;
	count if wave == `i' ;
	local attrit_midline_wave`i' = string(100*(1-round(`r(N)'/`Nwave`i'', 0.01))) + "\%" ;  
} ; 

* all waves ;
qui des ;
local attrit_midline_waveT = string(100*(1-round(`r(N)'/`NwaveT', 0.01))) + "\%" ; 

* *************************************************************************** ;
* endline data, percent missing
* *************************************************************************** ;

use "./data/endline.dta", clear ;

* by wave ; 
forvalues i = 1/4 { ;
	count if wave == `i' ;
	local attrit_endline_wave`i' = string(100*(1-round(`r(N)'/`Nwave`i'', 0.01))) + "\%" ;
} ; 

* all waves ;
qui des ;
local attrit_endline_waveT = string(100*(1-round(`r(N)'/`NwaveT', 0.01))) + "\%" ; 

* *************************************************************************** ;
* prepare the LaTeX table output
* *************************************************************************** ;

file open myfile using "./output/table-study-participants-by-wave-and-survey-attrition.tex", write replace ;
file write myfile "\begin{table}[h!]" _n ;
file write myfile "\renewcommand{\arraystretch}{1.2}" _n ;
file write myfile "\caption{Study Participants by Wave and Survey Attrition (Missing Data)}" _n ;
file write myfile "\label{samplesize}" _n ;
file write myfile "\centering" _n ;
file write myfile "\begin{tabular}{lccccccc}"_n ;
file write myfile "\toprule & &  \multicolumn{3}{c}{\shortstack[c]{\% Allocation \\ Random Assignment}}& \multicolumn{3}{c}{\shortstack[c]{\% of Study Participants\\ with Missing Data}}  \\" _n ;
file write myfile "\cmidrule(lr){3-5} \cmidrule(lr){6-8} & \shortstack[c]{No. of Study \\ Participants} & \shortstack[c]{HEE} & \shortstack[c]{HEEC} & Control & Baseline & Midline & Endline \\" ;
file write myfile "\midrule"_n ;
file write myfile "Wave 1 & `Nwave1' & \makecell[c]{`pct_wave1_hee'} & \makecell[c]{`pct_wave1_heec'} & \makecell[c]{`pct_wave1_control'}    & 0\%  & \makecell[c]{`attrit_midline_wave1'}  & \makecell[c]{`attrit_endline_wave1'} \\" _n;
file write myfile "Wave 2 & `Nwave2' & \makecell[c]{`pct_wave2_hee'} & \makecell[c]{`pct_wave2_heec'} & \makecell[c]{`pct_wave2_control'}    & 0\%  & \makecell[c]{`attrit_midline_wave2'}  & \makecell[c]{`attrit_endline_wave2'} \\" _n; 
file write myfile "Wave 3 & `Nwave3' & \makecell[c]{`pct_wave3_hee'} & \makecell[c]{`pct_wave3_heec'} & \makecell[c]{`pct_wave3_control'}    & 0\%  & \makecell[c]{`attrit_midline_wave3'}  & \makecell[c]{`attrit_endline_wave3'} \\" _n;
file write myfile "Wave 4 & `Nwave4' & \makecell[c]{`pct_wave4_hee'} & \makecell[c]{`pct_wave4_heec'} & \makecell[c]{`pct_wave4_control'}    & 0\%  & \makecell[c]{`attrit_midline_wave4'}  & \makecell[c]{`attrit_endline_wave4'} \\" _n;
file write myfile "\midrule" _n ;
file write myfile "Total  & `NwaveT' & \makecell[c]{`pct_waveT_hee'} & \makecell[c]{`pct_waveT_heec'} & \makecell[c]{`pct_waveT_control'} & 0\%  & \makecell[c]{`attrit_midline_waveT'}  & \makecell[c]{`attrit_endline_waveT'}  \\" _n ;
file write myfile "\bottomrule" _n ;
file write myfile "\multicolumn{8}{p{0.85\linewidth}}{\footnotesize \textit{Notes:} The study was conducted in four waves. Each wave refers to a separate iteration of participant recruitment, baseline, randomization, film screenings, midline, and endline. For each wave, the table indicates the number of study participants, their allocation to the study arms, and the percentage of study participants with missing data for each survey.}" _n ; 
file write myfile "\end{tabular}" _n ;
file write myfile "\end{table}" ;

file close myfile ;

exit ;

